## "China's Ideological Spectrum"
## by Jennifer Pan and Yiqing Xu

#############################
## CFA analysis
#############################

## 1. Estimating three Models
## 2. Bootstrapping
## 3. Present estimates

### Install Packages
## install.packages("lavaan")
## install.packages("ggplot2")
## install.packages("GGally")
## install.packages("semPlot")

###################################
## 1. Estimating three Models
####################################


### WARNING - this part takes fairly long processing time
### Results can be directly loaded from intermediate file (line 102)

## rm(list=ls(all=TRUE)) 
## library(lavaan)

## ## load data
## load("data/sample10K.RData")
## names(d)
## table(d$year)

## ## Grouping survey questions
## questions <- paste("q", 1:50, sep = "")
## f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism
## f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market
## f3 <- c(32, 41, 42, 43, 46, 47, 48, 49, 50) # traditional values
## f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism
## f5 <- c(24, 6, 7, 8, 44, 45) # individual freedom
## f6 <- c(23, 26, 28, 35, 39) # national interest
## f7 <- c(22, 31, 33, 34, 36, 38) # social justice
## FSet <- list(f1, f2, f3, f4, f5, f6, f7)


## ## a function that generates lavaan model syntax from a list of nubmers
## genModel <- function(model.list) {
##     model <- ""
##     for (j in 1:length(model.list)) {
##         model <- paste(model,
##                        paste('factor',j,' =~ ',
##                              paste(paste('q', model.list[[j]], sep=""),
##                                    collapse = " + "), sep = ""),
##                        "\n", sep = "") 
##     }
##     return(model)
## }

## ### Model A -- Political / Economic / Social:
## modelA <- list(sort(unique(c(f1,f5))),
##                sort(unique(c(f2,f3,f6,f7))),
##                sort(unique(c(f4))))
## fitA <- cfa(genModel(modelA), data=d[,questions], ordered = questions, std.lv = TRUE)
## inspect(fitA,"cov.lv")

## ### Model B -- Political + Nationalism / Economic + Social:
## modelB <- list(sort(unique(c(f1,f4,f5))),
##                sort(unique(c(f2,f3,f6,f7))))
## fitB <- cfa(genModel(modelB), data=d[,questions], ordered = questions, std.lv = TRUE)
## inspect(fitB,"cov.lv")

## ### Model C -- One Dimensional Model:
## modelC <- genModel(list(1:50))
## fitC <- cfa(modelC, data=d[,questions], ordered = questions, std.lv = TRUE)

## ## Now we look at the meanings of the latent factors
## summary(fitA)
## parameterEstimates(fitA, ci = FALSE, standardize = TRUE)

## ## standardized estimates
## lt <- lavPredict(fitA)
## d$lt1 <- lt[,1]     # political liberalism
## d$lt2 <- lt[,2]*-1  # market and western ideas
## d$lt3 <- lt[,3]*-1  # anti-nationalism
## coef <- parameterEstimates(fitA, ci = FALSE, standardize = TRUE)$std.all[1:50]
## coef[(length(modelA[[1]])+1):50] <- coef[(length(modelA[[1]])+1):50] * (-1)

## ## factor1: 1-18 (18);
## ## factor2: 19-43 (25);
## ## factor3: 44-50 (7)

## ## PCA
## pca.out<-prcomp(d[,questions], center=TRUE,scale=TRUE)
## d$PC1 <- pca.out$x[,1] * -1 

## save(d, coef, questions, f1, f2, f3, f4, f5, f6, f7, FSet, 
##      genModel, modelA, fitA, fitB, fitC, pca.out, file = "output/result_cfa.RData")

############################
## Figure 9: correlations 
############################

rm(list=ls(all=TRUE)) 
load("output/result_cfa.RData")

library(ggplot2)
library(GGally)
p <- ggpairs(d[,c("lt1","lt2","lt3","PC1")],
             columnLabels = c("Political Liberalism",
                              "Pro-Market/Non-Traditional Values",
                              "Nationalism * (-1)","1st Principal Component"),
             lower = list(continuous = wrap("points", alpha = 0.3, size=0.2),
                          combo = wrap("dot", alpha = 0.4, size=0.2)),
             upper = list(continuous = wrap("cor", size = 8, colour = 1)))  +
    theme(axis.text = element_text(size = 14, vjust = 1, color = "black"),
          axis.title =element_text(size=18, face="bold")) 
pdf("graphs/cfa_latents.pdf", height= 9, width = 9)
print(p, left = 0.2, bottom = 0.2)
graphics.off()

## PNG has smaller file size
png("graphs/cfa_latents.png", height= 700, width = 700)
print(p, left = 0.2, bottom = 0.2)
graphics.off()

###########################
## Figure A1: Model Setup
###########################

rm(list=ls(all=TRUE)) 
load("output/result_cfa.RData")

q1 <- paste("q", modelA[[1]], sep = "")
q2 <- paste("q", modelA[[2]], sep = "")
q3 <- paste("q", modelA[[3]], sep = "")

library(lavaan)
library(ggplot2)
library(semPlot)

fitA <- cfa(genModel(modelA), data=d[,questions], std.lv = TRUE)
semPaths(fitA, what = "path", edge.label.cex = 0.4,
         node.width = c(rep(0.35,50), rep(0.8, 3)),
         nodeLabels = c(unlist(modelA),"fc1","fc2","fc3"),
         height = 3.5, width = 8,
         filename = "graphs/cfa_path", filetype = "png")


####################################
## Figure A2: Correlations with PCA
####################################

pca1.pc <- scale(prcomp(d[, q1], center = TRUE, scale = TRUE)$x[, 1])
pca2.pc <- scale(prcomp(d[, q2], center = TRUE, scale = TRUE)$x[, 1])
pca3.pc <- scale(prcomp(d[, q3], center = TRUE, scale = TRUE)$x[, 1])

pdf("graphs/cfa_robust1.pdf")
par(mar = c(4, 4, 2, 2))
plot(1, type = "n", xlab = "", ylab = "",
     xlim = c(-3.5,3.5), ylim = c(-3.5,3.5))
abline(h = seq(-4,4,0.25), col = "#AAAAAA50")
abline(v = seq(-4,4,0.25), col = "#AAAAAA50")
points(d$lt1, pca1.pc, cex = 0.5, pch = 16, col = "gray30")
text(0,3.2,paste("Corr =", round(cor(d$lt1, pca1.pc),3)), cex = 2, font = 3)
mtext("Latent Factor 1", 1, 2.5, cex = 1.5)
mtext("PC1 from PCA(a)", 2, 2.5, cex = 1.5)
graphics.off()

pdf("graphs/cfa_robust2.pdf")
par(mar = c(4, 4, 2, 2))
plot(1, type = "n", xlab = "", ylab = "",
     xlim = c(-3.5,3.5), ylim = c(-3.5,3.5))
abline(h = seq(-4,4,0.25), col = "#AAAAAA50")
abline(v = seq(-4,4,0.25), col = "#AAAAAA50")
points(d$lt2, pca2.pc, cex = 0.5, pch = 16, col = "gray30")
text(0,3.2,paste("Corr =", round(cor(d$lt2, pca2.pc),3)), cex = 2, font = 3)
mtext("Latent Factor 2", 1, 2.5, cex = 1.5)
mtext("PC1 from PCA(b)", 2, 2.5, cex = 1.5)
graphics.off()

pdf("graphs/cfa_robust3.pdf")
par(mar = c(4, 4, 2, 2))
plot(1, type = "n", xlab = "", ylab = "",
     xlim = c(-3.5,3.5), ylim = c(-3.5,3.5))
abline(h = seq(-4,4,0.25), col = "#AAAAAA50")
abline(v = seq(-4,4,0.25), col = "#AAAAAA50")
points(d$lt3, pca3.pc, cex = 0.5, pch = 16, col = "gray30")
text(0,3.2,paste("Corr =", round(cor(d$lt3, pca3.pc),3)), cex = 2, font = 3)
mtext("Latent Factor 3", 1, 2.5, cex = 1.5)
mtext("PC1 from PCA(c)", 2, 2.5, cex = 1.5)
graphics.off()

### correlations of PCs
## It's possible that the CFA model over-estimates
## the correlations of the latent variables

## plot(pca1.pc, pca2.pc)
## plot(pca1.pc, pca3.pc)
## plot(pca2.pc, pca3.pc)

## cor(pca1.pc, pca2.pc) ## 0.69
## cor(pca1.pc, pca3.pc) ## 0.75
## cor(pca2.pc, pca3.pc) ## 0.72


###################################
## 2. Bootstrapping (slow)
####################################

## rm(list=ls(all=TRUE)) 
## load("output/result_cfa.RData")
## questions <- paste("q", 1:50, sep = "")
## data <- d[, questions]

## library(lavaan)
## library(doParallel)
## library(foreach)
## set.seed(02139)
## cl<-makeCluster(20) 
## registerDoParallel(cl)
## sims <- 200
## ndim <- 3
## nobs <- dim(d)[1]
## mod1 <- genModel(modelA)
## output <- foreach (i = 1:sims, .combine = c,
##                    .inorder = FALSE,
##                    .packages = c("lavaan")) %dopar% { 
##                        fit <- cfa(mod1, data=data[sample(1:nobs, nobs, replace = TRUE),],
##                                   std.lv = TRUE, ordered = questions)
##                        para <- parameterEstimates(fit, ci = FALSE,
##                                                   standardize = TRUE)$std.all[1:50]
##                        lt <- lavPredict(fit, newdata = data)
##                        return(list(para, lt))
## }
## stopCluster(cl)
## ## flip signs
## result.para <- matrix(NA, 50, sims)
## result.lv <- array(NA, dim = c(nobs, ndim, sims))
## for (i in 1:sims) {
##     result.para[, i] <- output[[(i-1)*2+1]]
##     result.lv[, , i] <- output[[i*2]] 
## }
## result.para[17:50,] <- result.para[17:50,] * (-1)
## result.lv[, 2:3, ] <- result.lv[, 2:3, ] * (-1)
## save(result.para, result.lv, file = "output/result_cfa_boot.RData")


###################################
## 3. Present Estimates
####################################


rm(list=ls(all=TRUE)) 
load("output/result_cfa.RData")
load("output/result_cfa_boot.RData")

a1 <- length(modelA[[1]])
a2 <- length(modelA[[2]])
a3 <- length(modelA[[3]])
qorder <- unlist(modelA)
ft <- rep(NA, 50) # original grouping
for (i in 1:50) {
    for (j in 1:7) {
        if (qorder[i]%in%FSet[[j]]) {
            ft[i] <- j
        }
    } 
}

## Table A3: Estimates
est.out <- cbind.data.frame("No." = qorder,
                            "L.V." = c(rep(1,a1),rep(2,a2),rep(3,a3)), 
                            "factor" = ft,
                            "Est." = coef,
                            "S.E." = apply(result.para, 1, sd),
                            "CI_lower" = apply(result.para, 1, quantile, 0.025),
                            "CI_upper" = apply(result.para, 1, quantile, 0.975))
library(xtable)
est.out2 <- matrix(NA, max(a1,a2,a3),11)
est.out2[1:a1,1:3] <- as.matrix(est.out[1:a1,c(1,4,5)])
est.out2[1:a2,5:7] <- as.matrix(est.out[(a1+1):(a1+a2),c(1,4,5)])
est.out2[1:a3,9:11] <- as.matrix(est.out[(a1+a2+1):50,c(1,4,5)])
colnames(est.out2) <- rep(c("No.","Coef.","S.E.",""),3)[-12]
print(xtable(est.out2, digits = c(0,0,3,3,0,0,3,3,0,0,3,3)),
      include.rownames = FALSE, file = "output/cfa_est.txt")


## Figures 6-8
qcontent2 <- c(
    "People should not have universal suffrage if they have not been educated about democracy.",
    "Universality of human rights take precedence over sovereignty.",
    "When events that have major repercussions for the safety and security occur, the government should\n freely disseminate information even if information disclosure increases the risks of unrest.",
    "Western multiparty systems are unsuitable for China in its current state.",
    "Indiscriminately imitating (systems of) western-style freedom of speech will lead to\n social disorder in China.",
    "It is preferable to let universities recruit students by themselves than to have a unified national\n college entrance examination system.",
    "Religoius adherents should be allowed to conduct missionary  work in\n nonreligious spaces.",
    "Primary school, secondary school, and college students should all participate in government\n organized military training.",
    "National unity and territorial integrity are the highest interest of society.",
    "Even if procedural rules are violated in the process of investigation and evidence gathering, those\n who have actually committed crimes should be punished.",
    "The state has an obligation to provide foreign aid.",
    "It is acceptable to besmirch the images of national leaders and founding leaders in literary and\n artistic works.",
    "When laws fail to fully constrain criminal behaviors, people have the right to impose their own\n punishments for these behaviors.",
    "Media should be allowed to represent the voice of a particular social stratum or interest group.",
    "If it has sufficient state capabilities, China has the right to take any action to defend its\n national interests.",
    "Force should be used to reunify Taiwan with China if conditions permit.",
    "Lawyers should do their utmost to defend clients even if the client has committed a crime.",
    "Chinese citizens should be allowed to hold foreign citizenship.",
    "It is impossible for western countries led by the United States to tolerate the rise of China\n into a major power.",
    "The state should take measures to train and support athletes so they can win glory for the country in\n various international competitions.",
    "The minimum wage should be set by the state.",
    "The fruits of China's economic development since reform and opening up are enjoyed by a small\n group of people; most people have not received much benefit.",
    "In the decision-making of major (infrastructure) projects, individual interests should give way\n to social interests.",
    "Wasting food is an individual freedom.",
    "If the price of pork is too high, the government should intervene.",
    "A high tariff should be imposed on imported goods that are also produced domestically to\n protect domestic industries.",
    "Education should be public to the greatest extent.",
    "The interests of state-owned enterprises are part of the national interest.",
    "Attempting to control real estate prices will undermine economic development.",
    "The primary means to improve the lives of the low-income people is to give them fiscal\n subsidies and support.",
    "A rich person deserves better medical services.",
    "High income earners should disclose the sources of their income.",
    "People who make money through gains from financial investments contribute less to then society\n than people make money through labor.",
    "It is better to sell state-owned enterprises to capitalists than to let them go bankrupt.",
    "Sectors related to national security and important to the national economy and people's\n livelihoods must be controlled by state-owned enterprises.",
    "The process of capital accumulation is always accompanied by harm to the working class.",
    "Individuals should be able to own, buy and sell land.",
    "The government should adopt higher grain purchasing prices to boost the income of peasants.",
    "Foreign capital in China should enjoy the same treatment as national capital.",
    "Natural monopolies that emerge out of market competitions are harmless.",
    "Two adults should be free to engage in voluntary sexual behavior regardless of their marital status.",
    "One should not openly comment on the shortcomings of their elders.",
    "The modern Chinese society needs Confucianism.",
    "The fundamental standard to evaluate the value of a work of art is whether it is liked by the masses.",
    "Even with population pressures, the state and the society have no right to interfere in the decision\n to have a child, or how many children to have.",
    "The Eight Diagrams (Bagua) in The Book of Changes (Zhouyi) can explain many things well.",
    "The perspective of traditional Chinese medicine on human health is superior to that of modern\n mainstream medical science.",
    "It is unnecessary to push forward the simplification of Chinese characters.",
    "Traditional Chinese classics should be the basic education material for children.",
    "I will recognize the relationship between my child and a same-sex partner if it is a voluntary choice." 
)


## plotting
ft1 <- est.out[1:a1, ]
ft2 <- est.out[(a1+1):(a1+a2),]
ft3 <- est.out[(a1+a2+1):50,]
draw <- rbind(ft1[order(ft1[,4], decreasing = TRUE),],
              ft2[order(ft2[,4], decreasing = TRUE),],
              ft3[order(ft3[,4], decreasing = TRUE),])

pdf("graphs/cfa_est_factor1.pdf",height = 9, width = 12)
par(mar = c(4,39,1,1), las = 1)
num <- 1:a1
plot(1, type = "n", xlim = c(-0.8, 0.8), ylim = c(a1,1),
     xlab = "", ylab = "", axes = FALSE, cex.axis = 1.5)
abline(h = 0:51, col = "gray90", cex = 0.8)
abline(v = seq(-0.8,0.8,0.1), col = "gray90", cex = 0.8)
abline(v = 0, col = "gray90", lwd = 5)
box(); axis(1, at = seq(-0.8, 0.8, 0.4))
for (i in num) {
   points(draw[i, 4], i,  pch =16, col = 1, cex = 1.2)
   lines(draw[i,6:7], c(i,i), col = 1, lwd = 4)
}
axis(2, at = 1:length(num), labels = qcontent2[draw[num,1]], cex.axis = 1)
mtext("Coefficient",1,2.5,cex = 1.5)
text(0.2, 1.5, "Latent Factor 1", cex = 2)
graphics.off()

pdf("graphs/cfa_est_factor2.pdf",height = 12, width = 12)
par(mar = c(4,39,1,1), las = 1)
num <- (a1+1):(a1+a2)
plot(1, type = "n", xlim = c(-0.8, 0.8), ylim = c((a1+a2), (a1+1)),
     xlab = "", ylab = "", axes = FALSE, cex.axis = 1.5)
abline(h = 0:51, col = "gray90", cex = 0.8)
abline(v = seq(-0.8,0.8,0.1), col = "gray90", cex = 0.8)
abline(v = 0, col = "gray90", lwd = 5)
box(); axis(1, at = seq(-0.8, 0.8, 0.4))
for (i in num) { 
    points(draw[i, 4], i, col = 1, pch = 16, cex = 1.2)
    lines(draw[i,6:7], c(i,i), col = 1, pch = 16, lwd = 3)
}
axis(2, at = num, labels = qcontent2[draw[num,1]], cex.axis = 1)
mtext("Coefficient",1,2.5,cex = 1.5)
text(0.2, 41.5, "Latent Factor 2", cex = 2)
graphics.off()

pdf("graphs/cfa_est_factor3.pdf",height = 4, width = 12)
par(mar = c(4,39,1,1), las = 1)
num <- (a1+a2+1):50
plot(1, type = "n", xlim = c(-0.8, 0.8), ylim = c(50.5, (a1+a2+0.5)),
     xlab = "", ylab = "", axes = FALSE, cex.axis = 1.5)
abline(h = 0:51, col = "gray90", cex = 0.8)
abline(v = seq(-0.8,0.8,0.1), col = "gray90", cex = 0.8)
abline(v = 0, col = "gray90", lwd = 5)
box(); axis(1, at = seq(-0.8, 0.8, 0.4))
for (i in num) { 
    points(draw[i, 4], i, col = 1, pch = 16, cex = 1.2)
    lines(draw[i,6:7], c(i,i), col = 1, pch = 16, lwd = 3)
}
axis(2, at = num, labels = qcontent2[draw[num,1]], cex.axis = 1)
mtext("Coefficient",1,2.5,cex = 1.5)
text(0.2, 49.5, "Latent Factor 3", cex = 2)
graphics.off()

